/**
 * FileName: ExternalJsonFilterImpl
 * Author:   SAMSUNG-PC 孙中军
 * Date:     2019/2/14 11:17
 * Description: 外部配置文件的方式进行过滤,默认选择value列进行过滤
 */
package cn.com.bonc.filter.impl;

import cn.com.bonc.filter.Filter;
import cn.com.bonc.util.DataFilterAndOperatorUtil;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;

import java.io.IOException;

public class ExternalJsonFilterImpl implements Filter {


    @Override
    public Dataset<Row> doFilter(Dataset<Row> rowDataset) {
        Dataset<String> stringDataset = rowDataset.selectExpr("CAST(value AS STRING)")
                .as(Encoders.STRING());
                //.filter((FilterFunction<String>)x-> x.split("[|]", -1).length==26);//代码方式过滤，得到标准长度的数据

        return DataFilterAndOperatorUtil
                .getInstance()
                .filter(stringDataset)
                .select("value")
                .as(Encoders.bean(Row.class));
    }
}
